This tutorial demonstrates how to use CoCo-ST (Compare and Contrast Spatial Transcriptomics) to identify both high-variance and low-variance spatial domains in Visium spot data. We will walk through data loading, normalization, contrastive learning, spatial domain detection, and deconvolution using scRNA-seq references.
We first load all dependencies required for spatial analysis, visualization, and deconvolution.
# Load required libraries
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(kernlab)
library(EnhancedVolcano)
library(CoCoST) # Or source("Codes/CoCoST_function.R") if local
library(Matrix)
library(spacexr)
library(slingshot)
# Increase memory limit for large datasets
options(future.globals.maxSize = 2 * 1024^6) # 2 GBWe load background (normal) and foreground (tumor) Visium datasets
# Load background (normal) Visium data
normalTissue <- Load10X_Spatial(
data.dir = "Y:/Projects/FM_ST/Data/Visium/Raw/53430/outs",
filename = "filtered_feature_bc_matrix.h5",
assay = "Spatial"
)
normalTissue <- SCTransform(normalTissue, assay = "Spatial", verbose = FALSE)
# Load foreground (tumor) Visium data
abnormalTissue <- Load10X_Spatial(
data.dir = "Y:/Projects/FM_ST/Data/Visium/Raw/53433/outs",
filename = "filtered_feature_bc_matrix.h5",
assay = "Spatial"
)
abnormalTissue <- SCTransform(abnormalTissue, assay = "Spatial", verbose = FALSE)Visualize the total number of detected molecules (UMIs) across the tissue to confirm that the spatial data were loaded correctly.
# Visualize total counts
SpatialFeaturePlot(abnormalTissue, pt.size.factor = 3, features = "nCount_Spatial") +
ggtitle("Foreground (Tumor) Spot Counts")We extract SCT-normalized expression data for both tumor and normal tissues. These matrices will be used as inputs for CoCo-ST.
We build similarity (affinity) matrices between spots using the Laplacian kernel, which captures local relationships in spatial gene expression. Note, there are different ways of constructing such matrices
Apply CoCo-ST to extract contrastive components that highlight structures unique to the tumor foreground relative to the normal tissue background.
# Set parameters
para <- 0.1 # contrastive parameter
Dim <- 20 # number of contrastive components
# Run CoCoST
CoCoModel <- CoCoST(t(fdata), Wf, t(bdata), Wb, para, Dim)
# Create Seurat reduction object
CoCo <- CreateDimReducObject(
embeddings = CoCoModel[["fgComponents"]],
loadings = CoCoModel[["projMatrix"]],
key = "CoCoST_",
assay = "SCT"
)
# Add CoCoST components to Seurat object
rownames(CoCo@feature.loadings) <- rownames(fdata)
abnormalTissue@reductions[["CoCoST"]] <- CoCoWe embed CoCo-ST features into UMAP space, identify spatial domains using graph-based clustering, and visualize the detected spatial domains.
# Run UMAP and clustering based on CoCoST embeddings
abnormalTissue <- RunUMAP(abnormalTissue, reduction = "CoCoST", dims = 1:10,
n.components = 5, reduction.name = "CoCo_UMAP")
abnormalTissue <- FindNeighbors(abnormalTissue, reduction = "CoCo_UMAP", dims = 1:5)
abnormalTissue <- FindClusters(abnormalTissue, verbose = TRUE, resolution = 0.05)## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3511
## Number of edges: 80095
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9811
## Number of communities: 6
## Elapsed time: 0 seconds
Display UMAP and spatial domain plots, showing how CoCo-ST separates spatially coherent regions.
cols <- c("#E1BD6D","deepskyblue1","#7A3A9A","#ED0000FF","#0B775E","#ff00ff","#7B556C","#44B05B")
DimPlot(abnormalTissue, reduction = "CoCo_UMAP", label = FALSE, cols = cols)# Visualize spatial domains
SpatialDimPlot(abnormalTissue, label = FALSE, label.size = 3, group.by = "coco_clusters", pt.size.factor = 3) +
scale_fill_manual(values=cols) +
ggtitle("CoCoST Spatial Domains")Assign interpretable names to spatial domains (SD1–SD6) and replot.
abnormalTissue <- RenameIdents(object = abnormalTissue, `0` = "SD 1", `1` = "SD 3", `2` = "SD 2", `3` = "SD 4",
`4` = "SD 5", `5` = "SD 6")
SpatialDimPlot(abnormalTissue, label = FALSE, label.size = 3, pt.size.factor = 3) +
scale_fill_manual(values=cols)Display the top contrastive components (CoCoST_1–CoCoST_5) to explore spatially varying patterns.
SpatialFeaturePlot(abnormalTissue, features = c("CoCoST_1", "CoCoST_2", "CoCoST_3", "CoCoST_4", "CoCoST_5"), ncol = 5,
pt.size.factor = 3)Next, we use RCTD to infer the dominant cell types within each spatial domain using a scRNA-seq reference.
# perform deconvolution
ref <- readRDS("Y:/Projects/FM_ST/Data/129S4_Major_CellTypes.rds")
ref <- UpdateSeuratObject(ref)
subref <- subset(ref, model == "Urethane")
Idents(subref) <- "Major_CellType"
subref@meta.data[["Major_CellType"]] <- factor(subref@meta.data[["Major_CellType"]],
levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
"Macrophages","cDC","Prolif_Macro","B cells","T cells",
"Proliferating T cells","pDC","Neutrophils","Plasma cells",
"Monocytes","NK cells"))Display the UMAP of reference scRNA-seq cell types for interpretability.
# Plot scRNA-seq celltypes (umap)
col2 = c("plum1","tomato","#762A83","deepskyblue1","#C4961A","#ff00ff","#DC0000FF","#4E84C4","chartreuse3",
"#D16103","#58593FFF","lightblue1","#068105","yellow","#4E2A1E","#C3D7A4","black")
DimPlot(subref, group.by = "Major_CellType", label = F, cols = col2)Use RCTD to deconvolve each Visium spot into its most probable cell type compositions.
# extract information to pass to the RCTD Reference function
counts <- subref@assays[["RNA"]]@counts
cluster <- as.factor(subref@meta.data[["Major_CellType"]])
names(cluster) <- colnames(subref)
nUMI <- subref@meta.data[["nCount_RNA"]]
names(nUMI) <- colnames(subref)
reference <- Reference(counts, cluster, nUMI)
# set up query with the RCTD function SpatialRNA
STcounts <- abnormalTissue@assays[["SCT"]]@counts
colnames(STcounts) <- colnames(abnormalTissue)
rownames(STcounts) <- rownames(abnormalTissue)
coords <- GetTissueCoordinates(abnormalTissue)
colnames(coords) <- c("x", "y")
coords[is.na(colnames(coords))] <- NULL
query <- SpatialRNA(coords, STcounts, colSums(STcounts))We now run RCTD
##
## Endothelial cells Epithelial cells Fibroblasts
## 7989 10000 9730
## Macrophages cDC Prolif_Macro
## 10000 2315 452
## B cells T cells Proliferating T cells
## 3469 6585 159
## pDC Neutrophils Plasma cells
## 178 2434 117
## Monocytes NK cells
## 3409 757
## [1] "gather_results: finished 1000"
## [1] "gather_results: finished 2000"
## [1] "gather_results: finished 3000"
abnormalTissue <- AddMetaData(abnormalTissue, metadata = RCTD@results$results_df)
abnormalTissue@meta.data[["second_type"]] <- factor(abnormalTissue@meta.data[["second_type"]],
levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
"Macrophages","cDC","Prolif_Macro","B cells","T cells",
"Proliferating T cells","pDC","Neutrophils","Plasma cells",
"Monocytes","NK cells"))
abnormalTissue@meta.data[["first_type"]] <- factor(abnormalTissue@meta.data[["first_type"]],
levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
"Macrophages","cDC","Prolif_Macro","B cells","T cells",
"Proliferating T cells","pDC","Neutrophils","Plasma cells",
"Monocytes","NK cells"))Finally, visualize dominant and secondary cell type distributions across the spatial tissue using RCTD outputs
SpatialDimPlot(abnormalTissue, pt.size.factor = 3, group.by = "first_type") + scale_fill_manual(values=col2)+
theme(
legend.text = element_text(size = 12),
legend.title = element_text(size = 12)
) +
guides(
fill = guide_legend(override.aes = list(size = 3)), # for filled shapes
color = guide_legend(override.aes = list(size = 3)) # for colored points
)SpatialDimPlot(abnormalTissue, pt.size.factor = 3, group.by = "second_type") + scale_fill_manual(values=col2)+
theme(
legend.text = element_text(size = 12),
legend.title = element_text(size = 12)
) +
guides(
fill = guide_legend(override.aes = list(size = 3)), # for filled shapes
color = guide_legend(override.aes = list(size = 3)) # for colored points
)
## Trajectory Inference (tissue-wise) Build SingleCellExperiment using
CoCoST contrastive components and CoCo-ST clusters. Then run Slingshot
using the normal region (cluster “0”) as the start.
# perform trajectory analysis (tissue wise)
sim <- SingleCellExperiment(assays = STcounts)
reducedDims(sim) <- SimpleList(DRM = CoCoModel[["fgComponents"]])
colData(sim)$clusterlabel <- factor(abnormalTissue@meta.data[["coco_clusters"]])
sim <- slingshot(sim, clusterLabels = 'clusterlabel', reducedDim = 'DRM', start.clus="0") Below is a helper function to plot trajectories on tissue sample The function divides tissue into grids, computes pseudotime direction per grid, and visualizes trajectories as arrows over pseudotime and cluster maps.
plotTrajectory = function(pseudotime, location, clusterlabels, gridnum,
color_in, pointsize = 5, arrowlength = 0.4,
arrowsize = 0.8, textsize = 22) {
pseudotime_use = pseudotime
info = as.data.frame(location)
colnames(info) = c("sdimx","sdimy")
grids = gridnum
min_x = min(info$sdimx)
min_y = min(info$sdimy)
max_x = max(info$sdimx)
max_y = max(info$sdimy)
# grid anchors
x_anchor = seq(min_x, max_x, length.out = grids + 1)
y_anchor = seq(min_y, max_y, length.out = grids + 1)
# store start/end coordinates
start_x_dat = c()
start_y_dat = c()
end_x_dat = c()
end_y_dat = c()
count = 0
for(num_x in 1:grids){
for(num_y in 1:grids){
filter_x = which(info$sdimx >= x_anchor[num_x] & info$sdimx <= x_anchor[num_x+1])
filter_y = which(info$sdimy >= y_anchor[num_y] & info$sdimy <= y_anchor[num_y+1])
points_in_grid = intersect(filter_x, filter_y)
if(length(points_in_grid) > 1){
count = count + 1
min_point = info[points_in_grid[which.min(pseudotime_use[points_in_grid])], ]
max_point = info[points_in_grid[which.max(pseudotime_use[points_in_grid])], ]
start_x_dat[count] = min_point$sdimx
start_y_dat[count] = min_point$sdimy
end_x_dat[count] = max_point$sdimx
end_y_dat[count] = max_point$sdimy
}
}
}
loc1 = info$sdimx
loc2 = info$sdimy
time = pseudotime_use + 0.01
# === Pseudotime scatter ===
datt = data.frame(time, loc1, loc2)
p01 = ggplot(datt, aes(x = loc1, y = loc2, color = time)) +
geom_point(alpha = 1, size = pointsize) +
scale_color_gradientn(colours = c("red", "green")) +
theme_void() +
theme(plot.title = element_text(size = textsize, face = "bold"),
text = element_text(size = textsize),
legend.position = "bottom")
# Arrow style
arrow_style <- arrow(length = unit(arrowlength,"cm"),
type = "closed", ends = "last")
datt2 = data.frame(start_x_dat, start_y_dat, end_x_dat, end_y_dat)
# Arrows only
p02 = ggplot(datt2) +
geom_segment(aes(x = start_x_dat, y = start_y_dat,
xend = end_x_dat, yend = end_y_dat),
arrow = arrow_style, size = arrowsize,
color = "grey20") +
theme_void()
p03 = ggplot(datt2) +
geom_segment(aes(x = end_x_dat, y = end_y_dat,
xend = start_x_dat, yend = start_y_dat),
arrow = arrow_style, size = arrowsize,
color = "grey20") +
theme_void()
# Clusters + arrows
datt1 = data.frame(time, loc1, loc2, clusterlabels = clusterlabels)
p1 = ggplot(datt1, aes(x = loc1, y = loc2)) +
geom_point(alpha = 1, size = pointsize, aes(color = clusterlabels)) +
scale_colour_manual(values = color_in) +
theme_void() +
theme(plot.title = element_text(size = textsize, face = "bold"),
text = element_text(size = textsize),
legend.position = "bottom")
p22 = p1 +
geom_segment(data = datt2,
aes(x = start_x_dat, y = start_y_dat,
xend = end_x_dat, yend = end_y_dat),
arrow = arrow_style, size = arrowsize,
color = "grey20")
p33 = p1 +
geom_segment(data = datt2,
aes(x = end_x_dat, y = end_y_dat,
xend = start_x_dat, yend = start_y_dat),
arrow = arrow_style, size = arrowsize,
color = "grey20")
return(list("Pseudotime" = p01,
"Arrowplot1" = p02,
"Arrowplot2" = p03,
"Arrowoverlay1" = p22,
"Arrowoverlay2" = p33))
}Plot Tissue Trajectories—- Extract spatial coordinates and pseudotime, then generate pseudotime and arrow-overlaid trajectory plots.
flocation <- GetTissueCoordinates(abnormalTissue)
pseudotime_traj1 = sim@colData@listData$slingPseudotime_1
gridnum = 10
color_in = c("#E1BD6D","deepskyblue1","#7A3A9A","#ED0000FF","#0B775E","#ff00ff","black")
p_traj1 = plotTrajectory(pseudotime_traj1, flocation, abnormalTissue@meta.data[["coco_clusters"]], gridnum,
cols, pointsize=2.5 ,arrowlength=0.2, arrowsize=1, textsize=15)
p_traj1$Arrowoverlay1Visualization of pseudotime across the tissue revealed a clear spatial gradient of cellular progression. The Adenoma region (SD 5) exhibited higher pseudotime values, indicating that cells within this area are more differentiated and represent a later stage of tissue evolution compared to surrounding normal or transitional structures.